In [ ]:
import pandas as pd

df = pd.read_csv('/Users/qbs/downloads/games.csv')

print(df.head())
     AppID                   Name  Release date Estimated owners  Peak CCU  \
0    20200       Galactic Bowling  Oct 21, 2008        0 - 20000         0   
1   655370           Train Bandit  Oct 12, 2017        0 - 20000         0   
2  1732930           Jolt Project  Nov 17, 2021        0 - 20000         0   
3  1355720               Henosis™  Jul 23, 2020        0 - 20000         0   
4  1139950  Two Weeks in Painland   Feb 3, 2020        0 - 20000         0   

   Required age  Price  DLC count  \
0             0  19.99          0   
1             0   0.99          0   
2             0   4.99          0   
3             0   5.99          0   
4             0   0.00          0   

                                      About the game  \
0  Galactic Bowling is an exaggerated and stylize...   
1  THE LAW!! Looks to be a showdown atop a train....   
2  Jolt Project: The army now has a new robotics ...   
3  HENOSIS™ is a mysterious 2D Platform Puzzler w...   
4  ABOUT THE GAME Play as a hacker who has arrang...   

                                 Supported languages  ...  \
0                                        ['English']  ...   
1  ['English', 'French', 'Italian', 'German', 'Sp...  ...   
2                 ['English', 'Portuguese - Brazil']  ...   
3  ['English', 'French', 'Italian', 'German', 'Sp...  ...   
4                     ['English', 'Spanish - Spain']  ...   

  Average playtime two weeks Median playtime forever  \
0                          0                       0   
1                          0                       0   
2                          0                       0   
3                          0                       0   
4                          0                       0   

  Median playtime two weeks             Developers             Publishers  \
0                         0  Perpetual FX Creative  Perpetual FX Creative   
1                         0           Rusty Moyher           Wild Rooster   
2                         0          Campião Games          Campião Games   
3                         0      Odd Critter Games      Odd Critter Games   
4                         0          Unusual Games          Unusual Games   

                                          Categories  \
0  Single-player,Multi-player,Steam Achievements,...   
1  Single-player,Steam Achievements,Full controll...   
2                                      Single-player   
3              Single-player,Full controller support   
4                   Single-player,Steam Achievements   

                            Genres  \
0              Casual,Indie,Sports   
1                     Action,Indie   
2  Action,Adventure,Indie,Strategy   
3           Adventure,Casual,Indie   
4                  Adventure,Indie   

                                                Tags  \
0                        Indie,Casual,Sports,Bowling   
1  Indie,Action,Pixel Graphics,2D,Retro,Arcade,Sc...   
2                                                NaN   
3  2D Platformer,Atmospheric,Surreal,Mystery,Puzz...   
4  Indie,Adventure,Nudity,Violent,Sexual Content,...   

                                         Screenshots  \
0  https://cdn.akamai.steamstatic.com/steam/apps/...   
1  https://cdn.akamai.steamstatic.com/steam/apps/...   
2  https://cdn.akamai.steamstatic.com/steam/apps/...   
3  https://cdn.akamai.steamstatic.com/steam/apps/...   
4  https://cdn.akamai.steamstatic.com/steam/apps/...   

                                              Movies  
0  http://cdn.akamai.steamstatic.com/steam/apps/2...  
1  http://cdn.akamai.steamstatic.com/steam/apps/2...  
2  http://cdn.akamai.steamstatic.com/steam/apps/2...  
3  http://cdn.akamai.steamstatic.com/steam/apps/2...  
4  http://cdn.akamai.steamstatic.com/steam/apps/2...  

[5 rows x 39 columns]
In [ ]:
print(df.describe())
              AppID       Peak CCU  Required age         Price     DLC count  \
count  7.171500e+04   71715.000000  71715.000000  71715.000000  71715.000000   
mean   1.199233e+06     140.763160      0.343499      7.223156      0.615394   
std    5.982204e+05    5797.045907      2.362144     11.072095     14.932957   
min    1.000000e+01       0.000000      0.000000      0.000000      0.000000   
25%    7.005500e+05       0.000000      0.000000      0.990000      0.000000   
50%    1.176780e+06       0.000000      0.000000      4.990000      0.000000   
75%    1.692260e+06       1.000000      0.000000      9.990000      0.000000   
max    2.379920e+06  872138.000000     21.000000    999.000000   2366.000000   

           Positive       Negative  Achievements  Recommendations  \
count  7.171500e+04   71715.000000  71715.000000     7.171500e+04   
mean   1.114768e+03     182.118065     21.642278     8.981311e+02   
std    2.652264e+04    4975.240406    185.584866     1.947641e+04   
min    0.000000e+00       0.000000      0.000000     0.000000e+00   
25%    1.000000e+00       0.000000      0.000000     0.000000e+00   
50%    9.000000e+00       3.000000      1.000000     0.000000e+00   
75%    5.900000e+01      18.000000     19.000000     0.000000e+00   
max    5.764420e+06  895978.000000   9821.000000     3.441592e+06   

       Average playtime forever  Average playtime two weeks  \
count              71715.000000                71715.000000   
mean                 119.160371                   11.734853   
std                 1230.111420                  203.819765   
min                    0.000000                    0.000000   
25%                    0.000000                    0.000000   
50%                    0.000000                    0.000000   
75%                    0.000000                    0.000000   
max               145727.000000                19159.000000   

       Median playtime forever  Median playtime two weeks    User_Score  \
count             71715.000000               71715.000000  57665.000000   
mean                106.875228                  12.581008      0.745519   
std                1641.347714                 221.151909      0.243772   
min                   0.000000                   0.000000      0.000000   
25%                   0.000000                   0.000000      0.625000   
50%                   0.000000                   0.000000      0.803109   
75%                   0.000000                   0.000000      0.937500   
max              208473.000000               19159.000000      1.000000   

         score_rank  
count  57665.000000  
mean   27772.955294  
std    17851.977865  
min        1.000000  
25%    14299.000000  
50%    28832.000000  
75%    43008.000000  
max    55820.000000  
In [ ]:
def clean_price(price):
    try:
        return float(price)
    except:
        return None  # or you can use a placeholder like 0 or -1

df['Price'] = df['Price'].apply(clean_price)
In [ ]:
bins = [-1, 0, 10, 1000]  # This means: (-1 to 0], (0 to 10], (10 to 1000]
labels = ['Free', 'Low-cost', 'Expensive']
df['Price_Category'] = pd.cut(df['Price'], bins=bins, labels=labels)
In [ ]:
# Only quantitative columns
df_quantitative = df.select_dtypes(include=['float64', 'int64'])

# Only qualitative columns
df_qualitative = df.select_dtypes(exclude=['float64', 'int64'])
print(df.head())
     AppID                   Name  Release date Estimated owners  Peak CCU  \
0    20200       Galactic Bowling  Oct 21, 2008        0 - 20000         0   
1   655370           Train Bandit  Oct 12, 2017        0 - 20000         0   
2  1732930           Jolt Project  Nov 17, 2021        0 - 20000         0   
3  1355720               Henosis™  Jul 23, 2020        0 - 20000         0   
4  1139950  Two Weeks in Painland   Feb 3, 2020        0 - 20000         0   

   Required age  Price  DLC count  \
0             0  19.99          0   
1             0   0.99          0   
2             0   4.99          0   
3             0   5.99          0   
4             0   0.00          0   

                                      About the game  \
0  Galactic Bowling is an exaggerated and stylize...   
1  THE LAW!! Looks to be a showdown atop a train....   
2  Jolt Project: The army now has a new robotics ...   
3  HENOSIS™ is a mysterious 2D Platform Puzzler w...   
4  ABOUT THE GAME Play as a hacker who has arrang...   

                                 Supported languages  ...  \
0                                        ['English']  ...   
1  ['English', 'French', 'Italian', 'German', 'Sp...  ...   
2                 ['English', 'Portuguese - Brazil']  ...   
3  ['English', 'French', 'Italian', 'German', 'Sp...  ...   
4                     ['English', 'Spanish - Spain']  ...   

  Median playtime forever Median playtime two weeks             Developers  \
0                       0                         0  Perpetual FX Creative   
1                       0                         0           Rusty Moyher   
2                       0                         0          Campião Games   
3                       0                         0      Odd Critter Games   
4                       0                         0          Unusual Games   

              Publishers                                         Categories  \
0  Perpetual FX Creative  Single-player,Multi-player,Steam Achievements,...   
1           Wild Rooster  Single-player,Steam Achievements,Full controll...   
2          Campião Games                                      Single-player   
3      Odd Critter Games              Single-player,Full controller support   
4          Unusual Games                   Single-player,Steam Achievements   

                            Genres  \
0              Casual,Indie,Sports   
1                     Action,Indie   
2  Action,Adventure,Indie,Strategy   
3           Adventure,Casual,Indie   
4                  Adventure,Indie   

                                                Tags  \
0                        Indie,Casual,Sports,Bowling   
1  Indie,Action,Pixel Graphics,2D,Retro,Arcade,Sc...   
2                                                NaN   
3  2D Platformer,Atmospheric,Surreal,Mystery,Puzz...   
4  Indie,Adventure,Nudity,Violent,Sexual Content,...   

                                         Screenshots  \
0  https://cdn.akamai.steamstatic.com/steam/apps/...   
1  https://cdn.akamai.steamstatic.com/steam/apps/...   
2  https://cdn.akamai.steamstatic.com/steam/apps/...   
3  https://cdn.akamai.steamstatic.com/steam/apps/...   
4  https://cdn.akamai.steamstatic.com/steam/apps/...   

                                              Movies  Price_Category  
0  http://cdn.akamai.steamstatic.com/steam/apps/2...       Expensive  
1  http://cdn.akamai.steamstatic.com/steam/apps/2...        Low-cost  
2  http://cdn.akamai.steamstatic.com/steam/apps/2...        Low-cost  
3  http://cdn.akamai.steamstatic.com/steam/apps/2...        Low-cost  
4  http://cdn.akamai.steamstatic.com/steam/apps/2...            Free  

[5 rows x 40 columns]
In [ ]:
from sklearn.feature_extraction.text import CountVectorizer
# Remove rows with NaN 'Name' values
df = df[df['Name'].notna()]



vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['Name'])
df_vectorized = pd.DataFrame(X.toarray(), columns=vectorizer.get_feature_names_out())

print(df_vectorized.head())
   00  000  001  00111  002  005  006  007  01  011  ...  피랍  학생들의  한국  현명한  \
0   0    0    0      0    0    0    0    0   0    0  ...   0     0   0    0   
1   0    0    0      0    0    0    0    0   0    0  ...   0     0   0    0   
2   0    0    0      0    0    0    0    0   0    0  ...   0     0   0    0   
3   0    0    0      0    0    0    0    0   0    0  ...   0     0   0    0   
4   0    0    0      0    0    0    0    0   0    0  ...   0     0   0    0   

   화이트래빗  회사  흡혈귀의  3豪華限定版  30才の夢追い人  4人打ちアクション麻雀  
0      0   0     0       0         0            0  
1      0   0     0       0         0            0  
2      0   0     0       0         0            0  
3      0   0     0       0         0            0  
4      0   0     0       0         0            0  

[5 rows x 38495 columns]
In [ ]:
#Drop non-useable columns
columns_to_drop = ['Reviews', 'Support url', 'Support email', 'Metacritic score', 
                   'Metacritic url', 'User score', 'Score rank', 'Notes', 'Screenshots', 'Movies']

# Drop the columns
df = df.drop(columns=columns_to_drop, errors='ignore')
In [ ]:
#Handle Missing Values
df['About the game'] = df['About the game'].fillna('No Description')
df['Supported languages'] = df['Supported languages'].fillna('No Supported Languages')
df['Full audio languages'] = df['Full audio languages'].fillna('No Audio Languages')
df['Website'] = df['Website'].fillna('No Websites')
df['Developers'] = df['Developers'].fillna('No Developers')
df['Publishers'] = df['Publishers'].fillna('No Publishers')
df['Categories'] = df['Categories'].fillna('No Categories')
df['Genres'] = df['Genres'].fillna('No Genres')
df['Tags'] = df['Tags'].fillna('No Tags')
df = df.dropna()
In [ ]:
print(df.head())
     AppID                   Name  Release date Estimated owners  Peak CCU  \
0    20200       Galactic Bowling  Oct 21, 2008        0 - 20000         0   
1   655370           Train Bandit  Oct 12, 2017        0 - 20000         0   
2  1732930           Jolt Project  Nov 17, 2021        0 - 20000         0   
3  1355720               Henosis™  Jul 23, 2020        0 - 20000         0   
4  1139950  Two Weeks in Painland   Feb 3, 2020        0 - 20000         0   

   Required age  Price  DLC count  \
0             0  19.99          0   
1             0   0.99          0   
2             0   4.99          0   
3             0   5.99          0   
4             0   0.00          0   

                                      About the game  \
0  Galactic Bowling is an exaggerated and stylize...   
1  THE LAW!! Looks to be a showdown atop a train....   
2  Jolt Project: The army now has a new robotics ...   
3  HENOSIS™ is a mysterious 2D Platform Puzzler w...   
4  ABOUT THE GAME Play as a hacker who has arrang...   

                                 Supported languages  ...  \
0                                        ['English']  ...   
1  ['English', 'French', 'Italian', 'German', 'Sp...  ...   
2                 ['English', 'Portuguese - Brazil']  ...   
3  ['English', 'French', 'Italian', 'German', 'Sp...  ...   
4                     ['English', 'Spanish - Spain']  ...   

  Average playtime forever Average playtime two weeks Median playtime forever  \
0                        0                          0                       0   
1                        0                          0                       0   
2                        0                          0                       0   
3                        0                          0                       0   
4                        0                          0                       0   

   Median playtime two weeks             Developers             Publishers  \
0                          0  Perpetual FX Creative  Perpetual FX Creative   
1                          0           Rusty Moyher           Wild Rooster   
2                          0          Campião Games          Campião Games   
3                          0      Odd Critter Games      Odd Critter Games   
4                          0          Unusual Games          Unusual Games   

                                          Categories  \
0  Single-player,Multi-player,Steam Achievements,...   
1  Single-player,Steam Achievements,Full controll...   
2                                      Single-player   
3              Single-player,Full controller support   
4                   Single-player,Steam Achievements   

                            Genres  \
0              Casual,Indie,Sports   
1                     Action,Indie   
2  Action,Adventure,Indie,Strategy   
3           Adventure,Casual,Indie   
4                  Adventure,Indie   

                                                Tags  Price_Category  
0                        Indie,Casual,Sports,Bowling       Expensive  
1  Indie,Action,Pixel Graphics,2D,Retro,Arcade,Sc...        Low-cost  
2                                            No Tags        Low-cost  
3  2D Platformer,Atmospheric,Surreal,Mystery,Puzz...        Low-cost  
4  Indie,Adventure,Nudity,Violent,Sexual Content,...            Free  

[5 rows x 30 columns]
In [ ]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(15,7))
sns.countplot(data=df, y='Genres', order=df['Genres'].value_counts().index)
plt.title('Distribution of Game Genres')
plt.xlabel('Number of Games')
plt.show()
In [ ]:
plt.figure(figsize=(15,7))
sns.countplot(data=df, x='Release date')
plt.title('Year-wise Game Releases')
plt.ylabel('Number of Games')
plt.xticks(rotation=45)
plt.show()
In [ ]:
df['User_Score'] = df['Positive']/(df['Positive'] + df['Negative'])
top_rated = df.sort_values(by='User_Score', ascending=False).head(10)  # Assuming you have a column named 'User_Score'
plt.figure(figsize=(15,7))
sns.barplot(data=top_rated, y='Name', x='User_Score', palette='viridis')
plt.title('Top 10 Rated Games')
plt.xlabel('User Score')
plt.show()
/var/folders/00/v47jm2yj45586bq73lrp36nh0000gn/T/ipykernel_55520/2896369396.py:4: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=top_rated, y='Name', x='User_Score', palette='viridis')
/Users/qbs/anaconda3/lib/python3.10/site-packages/IPython/core/pylabtools.py:152: UserWarning: Glyph 36015 (\N{CJK UNIFIED IDEOGRAPH-8CAF}) missing from current font.
  fig.canvas.print_figure(bytes_io, **kw)
/Users/qbs/anaconda3/lib/python3.10/site-packages/IPython/core/pylabtools.py:152: UserWarning: Glyph 27700 (\N{CJK UNIFIED IDEOGRAPH-6C34}) missing from current font.
  fig.canvas.print_figure(bytes_io, **kw)
/Users/qbs/anaconda3/lib/python3.10/site-packages/IPython/core/pylabtools.py:152: UserWarning: Glyph 27133 (\N{CJK UNIFIED IDEOGRAPH-69FD}) missing from current font.
  fig.canvas.print_figure(bytes_io, **kw)
In [ ]:
plt.figure(figsize=(12,7))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()
/var/folders/00/v47jm2yj45586bq73lrp36nh0000gn/T/ipykernel_55520/2899654527.py:2: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.
  sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
In [ ]:
plt.figure(figsize=(14, 6))
df['Categories'].value_counts().head(10).plot(kind='bar', color='lightgreen')
plt.title('Top 10 Categories by Number of Games')
plt.xlabel('Categories')
plt.ylabel('Number of Games')
plt.xticks(rotation=45)
plt.show()
In [ ]:
plt.figure(figsize=(14, 6))
df['Publishers'].value_counts().head(10).plot(kind='bar', color='lightblue')
plt.title('Top 10 Publishers by Number of Games Released')
plt.xlabel('Publisher')
plt.ylabel('Number of Games')
plt.xticks(rotation=45)
plt.show()
In [ ]:
df['score_rank'] = df['User_Score'].rank(ascending=False, method='min')
import matplotlib.pyplot as plt
import seaborn as sns

top_developers = df['Developers'].value_counts().index[:10]

filtered_df = df[df['Developers'].isin(top_developers)]

plt.figure(figsize=(15, 8))
sns.boxplot(x='Developers', y='score_rank', data=filtered_df)
plt.title('Distribution of Score Rank Across Top Developers')
plt.ylabel('Score Rank')
plt.xlabel('Developers')
plt.xticks(rotation=45)
plt.show()
In [ ]:
import matplotlib.pyplot as plt
import seaborn as sns

bins = [0, 10, 20, 30, 40, 50, 60, 100, 200]
labels = ['0-10', '11-20', '21-30', '31-40', '41-50', '51-60', '61-100', '101-200']
df['price_bin'] = pd.cut(df['Price'], bins=bins, labels=labels, right=False)

plt.figure(figsize=(15, 8))
sns.boxplot(x='price_bin', y='score_rank', data=df)
plt.title('Distribution of Score Rank Across Different Price Ranges')
plt.ylabel('Score Rank')
plt.xlabel('Price Range ($)')
plt.xticks(rotation=45)
plt.show()
In [ ]:
plt.figure(figsize=(12, 6))
sns.scatterplot(x='Price', y='score_rank', data=df, alpha=0.5)
plt.title('Scatter Plot between Score Rank and Price')
plt.ylabel('Score Rank')
plt.xlabel('Price ($)')
plt.show()